{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import svm\n",
    "from svmutil import *\n",
    "import os\n",
    "import re\n",
    "import numpy as np\n",
    "from numpy import linalg as LA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def match_featfile(featfile):    #定义一个判断是否为特征文件的函数\n",
    "    regex = re.compile(r\"fc6-1\")    #匹配fc6-1\n",
    "    if re.search(regex,featfile) == None:    #如果没有匹配到\n",
    "        return False\n",
    "#match_featfile('000121.fc6-1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def select_featfile(feat_files):    #定义一个筛选特征文件的函数\n",
    "    for item in feat_files:\n",
    "        if match_featfile(item) == False:\n",
    "            feat_files.remove(item)           \n",
    "    return feat_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_label(trainlabel_file):    #定义一个从文件中提取label，生成list的函数\n",
    "    label_list=[]     #建立一个list，用来存放label\n",
    "    path_list = readpath_without_enter(trainlabel_file)    #读取到去掉回车符的每行数据\n",
    "    return path_list\n",
    "#get_label(\"trainlabel.txt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def readpath_without_enter(routefile):    #定义一个读取路径并去掉末尾回车符的函数\n",
    "    fopen = open(routefile,\"r\")    #打开路径清单\n",
    "    path_list =  fopen.readlines()    #将每条路径都读取到一个list中，这里的每条路径字符串最后有一个回车符，需要处理掉\n",
    "    fopen.close()    #关闭文件\n",
    "    pathlist=[]    #建立一个空的list，准备用其存储每条路径\n",
    "    for each in path_list:    #遍历每一个路径字符串，去掉末尾的回车符\\n\n",
    "        pathlist.append(each.replace(\"\\n\",\"\"))    #去掉回车符\n",
    "    return pathlist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_binary_bolb(feature_path):    #定义一个读取特征文件的函数，返回特定格式的数据\n",
    "    fid = open(feature_path,'rb')      #打开特征文件\n",
    "    s = np.fromfile(fid, np.int32)[0:5]    #用np方式，int32 读取特征文件\n",
    "    fid.close()     #关闭文件\n",
    "    m = s[0] * s[1] * s[2] * s[3] * s[4]    #得到前五项的乘积\n",
    "    fid = open(feature_path,'rb')    #以二进制读取方式打开特征文件\n",
    "    data = np.fromfile(fid,np.float32)[5:m+5]    #用np方式，float32打开，这里的float与matlab中转换的不同\n",
    "    data = list(data)    #将数据转换成list格式\n",
    "    fid.close()\n",
    "    return data    #返回的数据是 长度为4096的列表，代表一个特征文件的特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def create_svm_input_data(trainroute_file):    #定义一个创建svm输入数据矩阵的函数\n",
    "    dirlist=readpath_without_enter(trainroute_file)    #读取路径\n",
    "    dim_feat = 4096     #维度\n",
    "    data_matrix = np.zeros((len(dirlist),dim_feat))    #建立一个全0矩阵\n",
    "    for item in range(len(dirlist)):     #对于每一条路径中的数据进行遍历\n",
    "        data = np.load(\"%s/c3d_fc6.npy\" % (dirlist[item]))    #读取到每一个c3d_fc6.npy文件\n",
    "        normed_data = data / LA.norm(data,2)    #将一个视频提到的多帧的特征（每个帧对应一帧的特征）求二范数，然后归一化\n",
    "        data_matrix[item,:] = normed_data    #循环将所有的值存入feat中\n",
    "    return data_matrix     #大小为   视频样本个数 * 4096 ，即每个视频最后有  4096 * 1 的特征 \n",
    "#create_svm_input_data(\"trainroute.txt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def read_c3d_feat(routefile):    #定义一个读取C3D特征的函数\n",
    "    dirlist=readpath_without_enter(routefile)\n",
    "    dim_feat = 4096     #维度\n",
    "    for item in range(len(dirlist)):    #对于每一个路径，其目录下存放了一个视频提帧后每个帧的特征，\n",
    "                                        #这个循环用来计算每个视频的特征，最后得到一个c3d_fc6.npy文件\n",
    "        feat_files = os.listdir(dirlist[item])  #讲一个视频的所有特征路径存储到列表\n",
    "        select_featfile(feat_files)    #筛选出特征文件\n",
    "        feat_files.sort()    #排序\n",
    "        num_feat = len(feat_files)    #计算该视频提取到的特征个数\n",
    "        feat = np.zeros((num_feat,dim_feat))    #建立一个全0矩阵\n",
    "        i = 0\n",
    "        for feature in feat_files:    #读取所有的特征文件，将他们的值读取到一个变量“feat”中\n",
    "            feat_path = os.path.join('%s/%s'%(dirlist[item],feature))\n",
    "            feat[i,:] = read_binary_bolb(feat_path)     #循环将所有的值存入feat中 \n",
    "            i += 1       #循环结束后feat的大小是 特征文件个数*4096\n",
    "        avg_feat = np.mean(feat,axis=0)    #求均值降维，例如   特征文件个数*4096-->1*4096 \n",
    "        avg_feat_float32 = np.float32(avg_feat)    #保证svm的输入为浮点数\n",
    "        np.save(\"%s/c3d_fc6.npy\" % (dirlist[item]),avg_feat_float32)   #将数据得到特征变量存到c3d_fc6.npy文件中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#create_libsvm_fotrmat_data， \"[label] valuekey1:value valuekey2:value valuekey3:value......\"\n",
    "def create_libsvm_format_data(route_file,label_file,train_or_test_data=\"data.txt\"):    #定义一个将data和label转换为libsvm格式的数据的函数\n",
    "    data = create_svm_input_data(route_file)    #读取数据，尺寸 =“样本数”* 4096 ，4096是C3D提取到的特征维数\n",
    "    label = get_label(label_file)     #得到数据的标签\n",
    "    fopen = open(train_or_test_data,'w')    #打开一个文件，准备写入libsvm格式数据\n",
    "    charater_size = np.shape(data)[-1]    #求得每个特征的属性个数\n",
    "    datalist = []\n",
    "    for item in range(len(label)):     #对于每一个样本编上序号1-len（label）\n",
    "        datalist.append(label[item])    #记录label\n",
    "        for charater in range(charater_size):    #对于每一个特征\n",
    "            datalist.append(\" %s:%s\" %(charater+1,data[item,charater])) #记录valuekey和value，value从data中读取\n",
    "        datalist.append(\"\\n\")    #每条数据结尾加上回车符\n",
    "    fopen.writelines(datalist)    #将记录到的每条数据都写入文件\n",
    "    fopen.close()    #关闭文件\n",
    "    #print(datalist)\n",
    "def create_train_data(traindata_file):    #定义一个创建libsvm格式的训练数据\n",
    "    create_libsvm_format_data(\"trainroute.txt\",\"trainlabel.txt\",traindata_file)\n",
    "def create_test_data(testdata_file):       #定义一个创建libsvm格式的测试数据\n",
    "    create_libsvm_format_data(\"testroute.txt\",\"testlabel.txt\",testdata_file)\n",
    "#定义一个创建libsvm格式的数据，自定义“数据路径文件名”，“数据标签文件名”，“即将生成的libsvm格式数据文件名”\n",
    "def create_single_test_data(route_file,labelfile,svm_format_data):    #定义一个创建libsvm格式的数据\n",
    "    create_libsvm_format_data(route_file,labelfile,svm_format_data)  \n",
    "#create_train_data(\"traindata.txt\")\n",
    "#create_test_data(\"testdata.txt\")\n",
    "#create_DIY_test_data(\"数据路径文件名\",\"数据标签文件名\",\"要生成的libsvm格式数据文件\"),默认均为\"diy_xxxxx.txt\"\n",
    "#create_DIY_data(\"testroute.txt\",\"testlabel.txt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#train_svm\n",
    "def train_svm():    #定义一个训练SVM的函数\n",
    "    read_c3d_feat(\"trainroute.txt\")\n",
    "    create_train_data(\"traindata.txt\")         #创建libsvm格式的训练数据\n",
    "    y,x = svm_read_problem(\"traindata.txt\")    #调用svm库读取数据\n",
    "    model = svm_train(y,x,'-c 5')    #训练svm模型\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#test_svm\n",
    "def single_test_svm(model,route_file=\"showroute.txt\",labelfile=\"showlabel.txt\",svm_format_data=\"showdata.txt\"):    #定义一个用svm_model测试的函数\n",
    "    #model = train_svm()     #即时训练\n",
    "    read_c3d_feat(route_file)    #读取测试样本的特征\n",
    "    create_single_test_data(route_file,labelfile,svm_format_data)    #创建libsvm格式的单个测试数据\n",
    "    y,x = svm_read_problem(svm_format_data)    #libsvm读取数据\n",
    "    label,acc,val = svm_predict(y,x,model)    #得到svm预测的结果\n",
    "    label = int(label[0]) + 1\n",
    "    print(\"Classified!\")\n",
    "    print(\"The video label is:\",label)\n",
    "#single_test_svm()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def batch_test_svm(model,route_file=\"testroute.txt\",labelfile=\"testlabel.txt\",svm_format_data=\"testdata.txt\"):    #定义一个用svm_model测试的函数\n",
    "    #model = train_svm()     #即时训练\n",
    "    read_c3d_feat(route_file)    #读取测试样本的特征\n",
    "    create_test_data(svm_format_data)    #创建libsvm格式的批量测试数据\n",
    "    y,x = svm_read_problem(svm_format_data)    #libsvm读取数据\n",
    "    label,acc,val = svm_predict(y,x,model)    #得到svm预测的结果\n",
    "    #label = int(label[0]) + 1\n",
    "    print(\"Classified!\")\n",
    "    #print(\"The video label is:\",label)\n",
    "    print(\"The accracy is:\",acc[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model = train_svm()     #训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy = 96.8571% (339/350) (classification)\n",
      "Classified!\n",
      "The accracy is: 96.85714285714286\n"
     ]
    }
   ],
   "source": [
    "###单个测试调用此函数，调用前确保testroute.txt 和 testlabel.txt 中内容正确\n",
    "### single_test_svm(model,route_file=\"testroute.txt\",labelfile=\"testlabel.txt\",svm_format_data=\"testdata.txt\")，将会返回这批数据的准确率\n",
    "batch_test_svm(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "###单个测试调用此函数，调用前确保showroute.txt 和 showlabel.txt 中内容正确\n",
    "### single_test_svm(model,route_file=\"showroute.txt\",labelfile=\"showlabel.txt\",svm_format_data=\"showdata.txt\")，将会返回预测的类标\n",
    "#single_test_svm(model,\"showroute.txt\",\"showlabel.txt\")  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
